
@article{ackley_learning_1985,
  title = {A Learning Algorithm for {{Boltzmann}} Machines},
  author = {Ackley, D. H. and Hinton, G. H. and Sejnowski, T. J.},
  year = {1985},
  volume = {9},
  journal = {Cognitive Science}
}

@phdthesis{anderson_learning_1986,
  title = {Learning and Problem Solving with Multilayer Connectionist Systems},
  author = {Anderson, C. W.},
  year = {1986},
  address = {{USA - Massachusetts}},
  school = {University of Massachusetts},
  type = {{{PhD Thesis}}}
}

@incollection{anderson_strategy_1987,
  title = {Strategy Learning with Multilayer Connectionist Representations},
  booktitle = {Proceedings of the {{Fourth International Workshop}} on {{Machine Learning}}},
  author = {Anderson, C. W.},
  year = {1987},
  publisher = {{Morgan Kaufmann}},
  address = {{Irvine. CA}}
}

@article{barto_learning_1985,
  title = {Learning by Statistical Cooperation of Self-Interested Neuron-like Computing Elements},
  author = {Barto, A. G.},
  year = {1985},
  volume = {4},
  journal = {Human Neurobiology}
}

@article{barto_neuronlike_1983,
  title = {Neuronlike Elements That Can Solve Difficult Learning Control Problems},
  author = {Barto, A. G. and Sutton, R. S. and Anderson, C. W.},
  year = {1983},
  volume = {13},
  journal = {IEEE Transactions on Systems. Man, and Cybernetics}
}

@phdthesis{booker_intelligent_1982,
  title = {Intelligent Behavior as an Adaptation to the Task Environment},
  author = {Booker, L. B.},
  year = {1982},
  address = {{USA - Michigan}},
  school = {University of Michigan},
  type = {{{PhD Thesis}}}
}

@incollection{christensen_learning_1986,
  title = {Learning Static Evaluation Functions by Linear Regression},
  booktitle = {Machine Learning: {{A}} Guide to Current Research},
  author = {Christensen, J.},
  editor = {Mitchell, T. M. and Carbonell, J. G. and Michalski, R. S.},
  year = {1986},
  publisher = {{Kluwer Academic}},
  address = {{Boston}}
}

@incollection{christensen_unified_1986,
  title = {A Unified Theory of Hemistic Evaluation Functions and Its Application to Learning},
  booktitle = {Proceedings of the {{Fifth National Conference}} on {{Artificial Intelligence}}},
  author = {Christensen, J. and Korf, R. E.},
  year = {1986},
  publisher = {{Morgan Kaufmann}},
  address = {{Philadelphia, PA}}
}

@book{denardo_dynamic_1982,
  title = {Dynamic Programming: {{Models}} and Applications},
  author = {Denardo, E. V.},
  year = {1982},
  publisher = {{Prentice-Hall}},
  address = {{Englewood Cliffs, NJ}}
}

@incollection{dietterich_learning_1986,
  title = {Learning to Predict Sequences},
  booktitle = {Machine Learning: {{An}} Artificial Intelligence Approach},
  author = {Dietterich, T. G. and Michalski, R. S.},
  editor = {Michalski, R. S. and Carbonell, J. G. and Mitchell, T. M.},
  year = {1986},
  publisher = {{Morgan Kaufmann}},
  address = {{Los Altos, CA}}
}

@incollection{gelperin_logic_1985,
  title = {The Logic of {{Limax}} Learning},
  booktitle = {Model Neural Networks and Behavior},
  author = {Gelperin, A. and Hopfield, J. J. and Tank, D. W.},
  editor = {Selverston, A.},
  year = {1985},
  publisher = {{Plenum Press}},
  address = {{New York}}
}

@article{hampson_disjunctive_1987,
  title = {Disjunctive Models of Boolean Category Learning},
  author = {Hampson, S. E. and Volper, D. J.},
  year = {1987},
  volume = {56},
  journal = {Biological Cybernetics}
}

@phdthesis{hampson_neural_1983,
  title = {A Neural Model of Adaptive Behavior},
  author = {Hampson, S. E.},
  year = {1983},
  address = {{USA - California}},
  school = {University of California at Irvine},
  type = {{{PhD Thesis}}}
}

@incollection{holland_escaping_1986,
  title = {Escaping Brittleness: {{The}} Possibilities of General-Purpose Learning Algorithms Applied to Parallel Rule-Based Systems},
  booktitle = {Machine Learning: {{An}} Artificial Intelligence Approach},
  author = {Holland, J. H.},
  editor = {Michalski, R. S. and Carbonell, J. G. and Mitchell, T. M.},
  year = {1986},
  publisher = {{Morgan Kaufmann}},
  address = {{Los Altos, CA}}
}

@article{kehoe_temporal_1987,
  title = {Temporal Primacy Over-Rides Prior Training in Serial Compound Conditioning of the Rabbit's Nictitating Membrane Response},
  author = {Kehoe, E. J. and Schreurs, B. G. and Graham, P.},
  year = {1987},
  volume = {15},
  journal = {Animal Learning and Behavior}
}

@book{kemeny_finite_1976,
  title = {Finite {{Markov}} Chains},
  author = {Kemeny, J. G. and Snell, J. L.},
  year = {1976},
  publisher = {{Springer-Verlag}},
  address = {{New York}}
}

@book{klopf_neuronal_1987,
  title = {A Neuronal Model of Classical Conditioning},
  author = {Klopf, A. H.},
  year = {1987},
  publisher = {{Wright-Patterson Air Force Base, Wright Aeronautical Laboratories}},
  address = {{OH}}
}

@article{moore_simulation_1986,
  title = {Simulation of the Classically Conditioned Nictitating Membrane Response by a Neuron-like Adaptive Element: {{Response}} Topography, Neuronal Firing and Interstimulus Intervals},
  author = {Moore, J. W. and Desmond, J. E. and Berthier, N. E. and Blazis, D. E. J. and Sutton, R. S. and Barto, A. G.},
  year = {1986},
  volume = {21},
  journal = {Behavioral Brain Research}
}

@book{rumelhart_learning_1985,
  title = {Learning Internal Representations by Error Propagation},
  author = {Rumelhart, D. E. and Hinton, G. E. and Williams, R. J.},
  year = {1985},
  publisher = {{University of California}},
  address = {{San Diego}}
}

@article{samuel_studies_1959,
  title = {Some Studies in Machine Learning Using the Game of Checkers},
  author = {Samuel, A. L.},
  year = {1959},
  volume = {3},
  journal = {IBM Journal on Research and Development}
}

@article{sutton_adaptive_1981,
  title = {An Adaptive Network That Constructs and Uses an Internal Model of Its Environment},
  author = {Sutton, R. S. and Barto, A. G.},
  year = {1981},
  volume = {4},
  journal = {Cognition and Brain Theory}
}

@incollection{sutton_learning_1985,
  title = {The Learning of World Models by Connectionist Networks},
  booktitle = {Proceedings of the {{Seventh Annual Conference}} of the {{Cognitive Science Society}}},
  author = {Sutton, R. S. and Pinette, B.},
  year = {1985},
  publisher = {{Lawrence Erlbaum}},
  address = {{Irvine, CA}}
}

@phdthesis{sutton_temporal_1984,
  title = {Temporal Credit Assignment in Reinforcement Learning},
  author = {Sutton, Richard},
  year = {1984},
  address = {{USA - Massachusetts}},
  abstract = {This dissertation describes computational experiments comparing the performance of a range of reinforcement-learning algorithms. The experiments are designed to focus on aspects of the credit-assignment problem having to do with determining when the behavior that deserves credit occurred. The issues of knowledge representation involved in developing new features or refining existing ones are not addressed.\^ The algorithms considered include some from learning automata theory, mathematical learning theory, early "cybernetic" approaches to learning, Samuel's checker-playing program, Michie and Chambers's "Boxes" system, and a number of new algorithms. The tasks were selected so as to involve, first in isolation and then in combination, the issues of misleading generalizations, delayed reinforcement, unbalanced reinforcement, and secondary reinforcement. The tasks range from simple,  "two-armed bandit" tasks to a physically realistic pole-balancing task.\^ The results indicate several areas where the algorithms presented here perform substantially better than those previously studied. An unbalanced distribution of reinforcement, misleading generalizations, and delayed reinforcement can greatly retard learning and in some cases even make it counterproductive. Performance can be substantially improved in the presence of these common problems through the use of mechanisms of reinforcement comparison and secondary reinforcement. We present a new algorithm similar to the "learning-by-generalization" algorithm used for altering the static evaluation function in Samuel's checker-playing program. Simulation experiments indicate that the new algorithm performs better than a version of Samuel's algorithm suitably modified for reinforcement learning tasks. Theoretical analysis in terms of an "ideal reinforcement signal" sheds light on the relationship between these two algorithms and other temporal credit-assignment algorithms. \^},
  file = {/home/a/Zotero/storage/K66LXKSX/Sutton_1984_Temporal credit assignment in reinforcement learning.pdf;/home/a/Zotero/storage/D5679KIQ/show.html},
  keywords = {Acquired Immunodeficiency Syndrome (AIDS,African Americans,Attendance,Black Studies,Females,Health Services,Intervention,Models,Prevention,Social Networks},
  school = {University of Massachusetts},
  type = {{{PhD Thesis}}}
}

@incollection{sutton_temporal-difference_1987,
  title = {A Temporal-Difference Model of Classical Conditioning},
  booktitle = {Proceedings of the {{Ninth Annual Conference}} of the {{Cognitive Science Society}}},
  author = {Sutton, R. S. and Barto, A. G.},
  year = {1987},
  publisher = {{Lawrence Erlbaum}},
  address = {{Seattle, WA}}
}

@article{sutton_toward_1981,
  title = {Toward a Modern Theory of Adaptive Networks: {{Expectation}} and Prediction},
  author = {Sutton, R. S. and Barto, A. G.},
  year = {1981},
  volume = {88},
  journal = {Psychological Review}
}

@book{varga_matrix_1962,
  title = {Matrix Iterative Analysis},
  author = {Varga, R. S.},
  year = {1962},
  publisher = {{Prentice-Hall}},
  address = {{Englewood Cliffs, NJ}}
}

@article{widrow_adaptive_1960,
  title = {Adaptive Switching Circuits},
  author = {Widrow, B and Hoff, M E},
  year = {1960},
  volume = {Part IV},
  pages = {96--104},
  file = {/home/a/Zotero/storage/JC6U3D5V/Widrow_Hoff_1960_Adaptive switching circuits.docx;/home/a/Zotero/storage/Z3FEKRRE/Widrow_Hoff_1960_Adaptive switching circuits.pdf;/home/a/Zotero/storage/Y7S3CSS4/show.html},
  journal = {WESCON Convention Record}
}

@book{widrow_adaptive_1985,
  title = {Adaptive Signal Processing},
  author = {Widrow, B. and Stearns, S. D.},
  year = {1985},
  publisher = {{Prentice-Hall}},
  address = {{Englewood Cliffs, NJ}}
}

@book{williams_reinforcement_1986,
  title = {Reinforcement Learning in Connectionist Networks: {{A}} Mathematical Analysis},
  author = {Williams, R. J.},
  year = {1986},
  publisher = {{University of California}},
  address = {{La Jolla}}
}

@article{witten_adaptive_1977,
  title = {An Adaptive Optimal Controller for Discrete-Time {{Markov}} Environments},
  author = {Witten, I. H.},
  year = {1977},
  volume = {34},
  journal = {Information and Control}
}


